In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

In [ ]:
from scipy.linalg import cholesky
from sklearn.metrics import pairwise_kernels

In [ ]:
from utils.functions import gaussian

In [ ]:
from scipy.linalg import inv
from joblib import Parallel, delayed

In [ ]:
import os, time

In [ ]:
def mkdirifnot(path):
    if not os.path.exists(path):
        os.mkdir(path)
    return path

In [ ]:
BASE_PATH = "./conf_prec_sel"
OUTPUT_PATH = mkdirifnot(os.path.join(BASE_PATH, "output"))

In [ ]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
levels_ = pd.Index(["%2.1f"%(100*lvl,) for lvl in levels], name="alpha")

In [ ]:
def loo_intervals(X, y, nugget=1.0, proc=None, levels=(0.05,), n_jobs=1, metric=None, **kwargs):
    Kxx = np.asfortranarray(pairwise_kernels(X, metric=metric, **kwargs))
    Kxx.flat[::Kxx.shape[0]+1] += nugget
    inv(Kxx, overwrite_a=True, check_finite=False)
## Get the loo-residuals A and B matrices
    A, B = np.dot(Kxx, y) - Kxx * y.T, Kxx
## Run the conformal procedure on each column of A-B
    d_proc = delayed(proc)
    parallel_ = Parallel(n_jobs=n_jobs, verbose=0)
    results_ = parallel_(d_proc(A[:, n], B[:, n],
                                levels=levels, n=n)
                         for n in xrange(X.shape[0]))
## Return the convex hull of each confidence region
    return np.array([[(res_[0, 0], res_[-1, -1]) for res_ in result_]
                     for result_ in results_])

In [ ]:
from sklearn.cross_validation import train_test_split
random_state = np.random.RandomState(0x4A04B766)

In [ ]:
from utils.functions_1d import triangle

In [ ]:
theta0, noise, size = 100.0, 5e-1, 251
metric = 'rbf' # 'laplacian'
# X = np.linspace(-1, 1, num=size).reshape((-1, 1))
# y = gaussian(X, random_state=random_state, metric=metric, nugget=noise, size=(1,), gamma=theta0)
X = np.linspace(-.5, 1.5, num=size).reshape((-1, 1))
y = triangle(X)
y = y + random_state.normal(size=(y.shape[0], 5)) * noise

Compute the GPR and conformal loo-interval widths


In [ ]:
np.allclose(Mi_.reshape((1, -1)), Mi_[np.newaxis])

In [ ]:
from utils.conformal import RRCM
nugget_ = 1e-6
theta_grid = np.logspace(-2.0, 5.0, num=71)
# theta_grid = np.logspace(-5.0, 7.0, num=73)

loo_sigma_, A, B, y_hat_ = list(), list(), list(), list()
for theta_ in theta_grid:
    Kxx_ = np.asfortranarray(pairwise_kernels(X, metric=metric, gamma=theta_))
    Kxx_.flat[::Kxx_.shape[0] + 1] += nugget_
    inv(Kxx_, overwrite_a=True, check_finite=False)

    Mi_ = np.diag(Kxx_).reshape((1, -1))
    if True:
        yQM_ = np.dot(y.T, Kxx_) / Mi_
        B_ = Kxx_ / Mi_
        A_ = yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T
        hat_ = y.T - yQM_
    else:
        yQM_ = np.dot(y.T, Kxx_)
        B_ = Kxx_
        A_ = yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T
        hat_ = y.T - nugget_ * yQM_

    sigma2_ = np.diag(np.dot(y.T, np.dot(Kxx_, y))) / Kxx_.shape[0]
    loo_sigma_.append(sigma2_.reshape((-1, 1)) / Mi_)

    A.append(A_)
    B.append(B_)
    y_hat_.append(hat_) # y_hat_.append(y.T - nugget_ * np.dot(y.T, Kxx_))
A, B = np.stack(A, axis=1), np.stack(B, axis=0)
loo_sigma_ = np.sqrt(np.stack(loo_sigma_, axis=1))
y_hat_ = np.stack(y_hat_, axis=1)

In [ ]:
# plt.plot(y - nugget_ * np.dot(Kxx_, y))
plt.plot(y_hat_[0, 69].T);

In [ ]:
for j in range(y.shape[1]):
    MM_ = 1.0 / np.diag(Kxx_).reshape((-1, 1))
    AA_ = MM_ * (np.dot(Kxx_, y[:, j, np.newaxis]) - Kxx_ * y[:, j, np.newaxis].T)
    assert np.allclose(A[j, -1], AA_.T)

In [ ]:
## Run the conformal procedure on each column of A-B
parallel_ = Parallel(n_jobs=-1, verbose=0)
results_ = parallel_(delayed(RRCM)(A[r, t, n], B[t, n], levels=levels, n=n)
                     for r in xrange(y.shape[1])
                         for t in xrange(len(theta_grid))
                             for n in xrange(X.shape[0]))

In [ ]:
intervals_ = np.reshape(np.array([(res_[0, 0], res_[-1, -1]) for result_ in results_ for res_ in result_]),
                        (y.shape[1], len(theta_grid), X.shape[0], len(levels), 2))

In [ ]:
y_ = y.T.reshape(y.shape[1], 1, y.shape[0], 1)
mask_ = (intervals_[..., 0] <= y_) & (y_ <= intervals_[..., 1])
print np.max(np.abs(1 - np.mean(mask_, axis=-2) - levels), axis=1).max(axis=0)

Create the color scheme


In [ ]:
points_ = [110, 125, 142, 189]
# [90, 101, 123, 124, 126, 142, 159] # [180, 203, 246, 249, 252, 285, 318] #[50, 100, 150, 200]
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))

filename_template_ = "./tri_%d_%g_%g_%%s.pdf"%(size, noise, nugget_,)

Compare the summarized widths.


In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, 0], c="b")
for j, p in enumerate(points_):
    ax.axvline(x=X[p], c=colors_[j], linestyle="-", marker="o")
ax.set_title("Sample path of the studied function")
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")
fig.savefig(filename_template_%("sample",), dpi=120)
plt.show()

In [ ]:
from scipy.stats import norm
def demo_gpr(ax, a=-1, adaptive=True):
    ax.set_xscale("log")
    for j, p in enumerate(points_):
        z_a = norm.ppf(1 - .5 * levels)[a]
        if adaptive:
            ax.plot(theta_grid, loo_sigma_[0, :, p] * z_a,
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, -loo_sigma_[0, :, p] * z_a,
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y[p, 0] - y_hat_[0, :, p], c=colors_[j],
                    linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
        else:
            ax.plot(theta_grid, loo_sigma_[0, :, p] * z_a + y_hat_[0, :, p],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, -loo_sigma_[0, :, p] * z_a + y_hat_[0, :, p],
                    c=colors_[j], marker="^", markersize=3)
            ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
    if adaptive:
        ax.set_title("prediction-centered %s%% GPR LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
    else:
        ax.set_title("%s%% GPR LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
    ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
    ax.set_ylabel("Target $y_x$")
    ax.legend(loc="best", ncol=2)
    ax.grid()
    return ax

In [ ]:
from scipy.stats import norm
def demo_conf(ax, a=-1, adaptive=True, plot_gpr=True):
    ax.set_xscale("log")
    for j, p in enumerate(points_):
        if adaptive:
            ax.plot(theta_grid, intervals_[0, :, p, a, 1] - y_hat_[0, :, p],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals_[0, :, p, a, 0] - y_hat_[0, :, p],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y[p, 0] - y_hat_[0, :, p], c=colors_[j],
                    linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
        else:
            ax.plot(theta_grid, intervals_[0, :, p, a, 1],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals_[0, :, p, a, 0],
                    c=colors_[j], marker="^", markersize=3)
            ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
    if adaptive:
        ax.set_title("prediction-centered %s%% RRCM LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
    else:
        ax.set_title("%s%% RRCM LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
    ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
    ax.set_ylabel("Target $y_x$")
    ax.legend(loc="best", ncol=2)
    ax.grid()
    return ax

In [ ]:
a, adaptive = 2, False
fig = plt.figure(figsize=(16, 9))
ax = demo_conf(fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_gpr(fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
fig.savefig(filename_template_%("intervals",), dpi=120)
plt.show()


In [ ]:
from utils.state import _load

In [ ]:
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_124611.gz")
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_160409.gz")
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_170831.gz")
exp_triangle = list()
for file_ in ['./conf_prec_sel/exp_triangle_1d0001-20160531_173311.gz', 
              './conf_prec_sel/exp_triangle_1d0002-20160531_173805.gz']:
    exp_triangle.extend(_load(file_))

In [ ]:
def demo_int(intervals, y, y_hat, name, points, ax, a=-1, adaptive=True, plot_gpr=True):
    colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points)))
    theta_grid = np.logspace(-2.0, 5.0, num=71)
    ax.set_xscale("log")
    for j, p in enumerate(points):
        if adaptive:
            ax.plot(theta_grid, intervals[0, :, p, a, 1] - y_hat[0, :, p],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[0, :, p, a, 0] - y_hat[0, :, p],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y[p, 0] - y_hat[0, :, p], c=colors_[j],
                    linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
        else:
            ax.plot(theta_grid, intervals[0, :, p, a, 1],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[0, :, p, a, 0],
                    c=colors_[j], marker="^", markersize=3)
            ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
    if adaptive:
        ax.set_title("prediction-centered %s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    else:
        ax.set_title("%s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
    ax.set_ylabel("Target $y_x$")
    ax.legend(loc="best", ncol=2)
    ax.grid()
    return ax

In [ ]:
for noise_, size_, nugget_, loo_, result_ in exp_triangle:
    X, y, y_hat_, gpr_intervals_, rrcm_intervals_ = result_
    
    filename_template_ = "./tri_%d_%g_%g_%%s.pdf"%(size_, noise_, nugget_,)
    title_template_ = "triangle ($n=%d$, $\\gamma=%g$): %%s"%(size_, noise_,)

    if size_ == 101:
        points_ = [50, 37, 62,]
    elif size_ == 251:
        points_ = [110, 125, 142, 189]
    else:
        points_ = [190, 200, 249, 295, 318]

    output_path_ = mkdirifnot(os.path.join(OUTPUT_PATH, "%d"%(size_,)))
    output_path_ = mkdirifnot(os.path.join(output_path_, "%g"%(noise_,)))

    colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
## Profile
    fig = plt.figure(figsize=(16, 9))
    ax = fig.add_subplot(111)
    ax.plot(X, y[:, 0], c="b")
    for j, p in enumerate(points_):
        ax.axvline(x=X[p], c=colors_[j], linestyle="-", marker="o")
    ax.set_title(title_template_%("sample path"))
    ax.set_xlabel("x")
    ax.set_ylabel("$y_x$")
    filename_ = os.path.join(output_path_, filename_template_%("sample",))
    fig.savefig(filename_, dpi=120)
    plt.close()

## Intervals
    a, adaptive = 2, False
    fig = plt.figure(figsize=(16, 9))
    ax = demo_int(rrcm_intervals_, y, y_hat_, "RRCM " + ("loo-" if loo_ else ""),
                  points_, fig.add_subplot(121), a=a, adaptive=adaptive)
    ax = demo_int(gpr_intervals_, y, y_hat_, "GPR " + ("loo-" if loo_ else ""),
                  points_, fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
    filename_ = os.path.join(output_path_, filename_template_%(("loo-" if loo_ else "") + "intervals",))
    fig.savefig(filename_, dpi=120)
    plt.close()
#     break


In [ ]:
from scipy.stats import norm
z_a = norm.ppf(1 - .5 * levels)
loo_rrcm_width_ = intervals_[..., 1] - intervals_[..., 0]
loo_gpr_width_ = 2 * np.sqrt(loo_sigma_)[..., np.newaxis] * z_a.reshape((1, 1, 1, -1))

In [ ]:
loo_rrcm_width_.shape

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
colors_conf_ = plt.cm.rainbow(np.linspace( 0, 1, 4 )[::-1])

## Conformal and GPR: meadian width
for j, lvl_ in enumerate(levels_):
    ax.plot(theta_grid, np.median(np.median(loo_rrcm_width_[..., j], axis=-1), axis=0),
            c=colors_conf_[j], zorder=99, label="RRCM %s%%"%(lvl_,))

# Gaussian widths
for j, lvl_ in enumerate(levels_):
    ax.plot(theta_grid, np.median(np.median(loo_gpr_width_[..., j], axis=-1), axis=0),
            c="k", linestyle="-", zorder=-99, label="GPR %s%%"%(levels_[j],))

ax.set_xscale("log") ; ax.set_xlabel("Isotropic Gaussian kernel precision ($\\theta$, log-scale)")
# ax.set_yscale("log") ;
ax.set_ylabel("Confidence reigion width (log)")
ax.legend(loc="best", ncol=2)
ax.set_title("Median width of predicitve regions across all sample points")

In [ ]:
loo_rrcm_width_.shape

In [ ]:
loo_rrcm_width_[0, :, 100, j].shape

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
colors_conf_ = plt.cm.rainbow(np.linspace( 0, 1, 4 )[::-1])

## Conformal and GPR: meadian width
for j, lvl_ in enumerate(levels_):
    ax.plot(theta_grid, loo_rrcm_width_[0, :, 100, j],
            c=colors_conf_[j], zorder=99, label="RRCM %s%%"%(lvl_,))

# Gaussian widths
for j, lvl_ in enumerate(levels_):
    ax.plot(theta_grid, loo_gpr_width_[0, :, 100, j],
            c="k", linestyle="-", zorder=-99, label="GPR %s%%"%(levels_[j],))

ax.set_xscale("log") ; ax.set_xlabel("Isotropic Gaussian kernel precision ($\\theta$, log-scale)")
# ax.set_yscale("log") ;
ax.set_ylabel("Confidence reigion width (log)")
ax.legend(loc="best", ncol=2)
ax.set_title("Median width of predicitve regions across all sample points")

Inspect the leverage


In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)

ax.plot(theta_grid, np.mean(leverage_, axis=1),
        c="k", zorder=99, label="mean")
ax.plot(theta_grid, np.max(leverage_, axis=1),
        c="k", marker="v", alpha=0.15, label="max")
ax.plot(theta_grid, np.min(leverage_, axis=1),
        c="k", marker="^", alpha=0.15, label="min")
ax.set_xscale("log")
ax.set_title("Leverage of the KRR")
ax.legend(loc="best")




Nemirovsky procedure


In [ ]:
import os, time

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from scipy.linalg import cholesky
from sklearn.metrics import pairwise_kernels

from scipy.linalg import inv
from joblib import Parallel, delayed

from utils.functions_1d import triangle
from utils.functions import gaussian

from utils.conformal import RRCM

from scipy.stats import norm

In [ ]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
levels_ = pd.Index(["%2.1f"%(100*lvl,) for lvl in levels], name="alpha")

In [ ]:
random_state = np.random.RandomState(0x4A08F766)

In [ ]:
theta0, size = 100.0, 251
metric = 'rbf' # 'laplacian'
# X = np.linspace(-1, 1, num=size).reshape((-1, 1))
X = np.linspace(-.5, 1.5, num=size).reshape((-1, 1))
yy = list()
for noise_ in [1e-6, 1e-2, 5e-2, 1e-1, 5e-1]:
    y1 = gaussian(X, random_state=random_state, metric=metric, nugget=noise_, size=(2,), gamma=theta0)
    y2 = triangle(X) + random_state.normal(size=(X.shape[0], 2)) * noise_
    yy.append(np.concatenate([y1, y2], axis=-1))
y = np.concatenate(yy, axis=-1)

In [ ]:
nugget_ = 1e-6
theta_grid = np.logspace(-2.0, 5.0, num=211)

loo_sigma_, A, B, y_hat_, y_hat_loo_ = list(), list(), list(), list(), list()
for theta_ in theta_grid:
    Kxx_ = np.asfortranarray(pairwise_kernels(X, metric=metric, gamma=theta_))
    Kxx_.flat[::Kxx_.shape[0] + 1] += nugget_

    inv(Kxx_, overwrite_a=True, check_finite=False)

    Mi_ = np.diag(Kxx_).reshape((1, -1))
    sigma2_ = np.diag(np.dot(y.T, np.dot(Kxx_, y))) / Kxx_.shape[0]
    loo_sigma2_ = sigma2_.reshape((-1, 1)) / Mi_

    yQM_ = np.dot(y.T, Kxx_)
    hat_loo_ = y.T - yQM_ / Mi_
    yQM_ *= nugget_
    hat_ = y.T - yQM_
    B_ = nugget_ * Kxx_

    A.append(yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T)
    loo_sigma_.append(np.sqrt(loo_sigma2_))
    y_hat_loo_.append(hat_loo_)
    y_hat_.append(hat_)
    B.append(B_)

A, B = np.stack(A, axis=1), np.stack(B, axis=0)
loo_sigma_ = np.stack(loo_sigma_, axis=1)
y_hat_loo_ = np.stack(y_hat_loo_, axis=1)
y_hat_ = np.stack(y_hat_, axis=1)

In [ ]:
## Run the conformal procedure on each column of A-B
parallel_ = Parallel(n_jobs=-1, verbose=0)
results_ = parallel_(delayed(RRCM)(A[r, t, n], B[t, n], levels=levels, n=n)
                     for r in xrange(y.shape[1])
                     for t in xrange(len(theta_grid))
                     for n in xrange(X.shape[0]))

rrcm_intervals_ = np.reshape(np.array([(res_[0, 0], res_[-1, -1])
                                       for result_ in results_ for res_ in result_]),
                             (y.shape[1], len(theta_grid), X.shape[0], len(levels), 2))

In [ ]:
z_a = norm.ppf(1 - .5 * levels)
gpr_hwidth_ = loo_sigma_[..., np.newaxis] * z_a.reshape((1, 1, 1, -1))
gpr_intervals_ = np.stack([y_hat_loo_[..., np.newaxis] - gpr_hwidth_,
                           y_hat_loo_[..., np.newaxis] + gpr_hwidth_], axis=-1)

In [ ]:
def demo_int(intervals, y, y_hat, name, points, ax, a=-1, adaptive=True, plot_gpr=True):
    colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points)))
    ax.set_xscale("log")
    for j, p in enumerate(points):
        if adaptive:
            ax.plot(theta_grid, intervals[:, p, a, 1] - y_hat[:, p],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[:, p, a, 0] - y_hat[:, p],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y[p] - y_hat[:, p], c=colors_[j],
                    linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
        else:
            ax.plot(theta_grid, intervals[:, p, a, 1],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[:, p, a, 0],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y_hat[:, p], c=colors_[j],
                    linestyle="-", alpha=0.75)
            ax.axhline(y=y[p], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
    if adaptive:
        ax.set_title("loo-prediction centered %s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    else:
        ax.set_title("%s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
    ax.set_ylabel("Target $y_x$")
    ax.legend(loc="best", ncol=2)
    ax.grid()
    return ax

In [ ]:
## Intervals
points_ = [110, 125, 142, 189]
j, a, adaptive = 2, -1, False

fig = plt.figure(figsize=(16, 9))
ax = demo_int(rrcm_intervals_[j], y[:, j], y_hat_loo_[j], "RRCM ",
              points_, fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_int(gpr_intervals_[j], y[:, j], y_hat_loo_[j], "GPR ",
              points_, fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
plt.show()

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, j], c="b")
# ax.plot(X, y[:, j, np.newaxis] - y_hat_loo_[j, :].T)
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
for c, x in zip(colors_, X[points_]):
    ax.axvline(x=x, c=c, linestyle="-", marker="o")
ax.set_title("Sample path of the studied function")
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")

In [ ]:
## Compute the running intersection over the theta axis (1)
reverse = False
if not reverse:
    top_ = np.minimum.accumulate(rrcm_intervals_[..., 1], axis=1)
    bottom_ = np.maximum.accumulate(rrcm_intervals_[..., 0], axis=1)
else:
    top_ = np.minimum.accumulate(rrcm_intervals_[:, ::-1, ..., 1], axis=1)
    bottom_ = np.maximum.accumulate(rrcm_intervals_[:, ::-1, ..., 0], axis=1)

width_ = top_ - bottom_
width_[width_ < 0] = -1

## t - b is nonincreasing, that is why the argmin of |t-b| is either +\infty
## or the point of crossing the zero level (again over the theta axis(1)).
theta_index_ = np.argmin(width_, axis=1)

## Find the largest index (theta) below which all intervals of all sample
##  points have nonempty intersection.
theta_star = np.min(theta_index_, axis=1)
if reverse:
## Since we need the largest k, for which all intervals beyond it overlap
##  tkae the `max`.
    theta_star = theta_index_.shape[1] - theta_star

In [ ]:
if reverse:
    plt.plot((width_[0, ::-1, 125]))
else:
    plt.plot((width_[0, :, 125]))

In [ ]:
## Just take the index (theta) which minimizes both effects: the 
theta_star = np.median(np.argmin(rrcm_intervals_[..., 1] - rrcm_intervals_[..., 0], axis=1), axis=1).astype(int)

In [ ]:
from sklearn.gaussian_process import GaussianProcess

for j in range(y.shape[1]):
    gp = GaussianProcess(thetaL=1e-5, thetaU=1e+7, theta0=100.0, nugget=nugget_, normalize=False,
                         regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
    plt.plot(y[:, j], c="r")
    plt.plot(gp.predict(X), c="b", label="MLE")

    theta_inx_ = theta_star[j, -1]
    plt.plot(y_hat_[j, theta_inx_], c="g", label="Nem")

#     gp_ = GaussianProcess(theta0=theta_, nugget=nugget_, normalize=False,
#                           regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
#     plt.plot(gp_.predict(X), c="g")
    plt.legend(loc="best")
    plt.show()
    plt.close()
    print "Nemirovsky:\t%2.5g,\nGP:\t\t%2.5g\n"%(theta_grid[theta_inx_], gp.theta_[0])

In [ ]: